library(phyloseq); packageVersion("phyloseq")
#> [1] '1.46.0'
library(ggplot2); packageVersion("ggplot2")
#> [1] '3.5.1'
library(readr); packageVersion("readr")
#> [1] '2.1.5'
library(purrr); packageVersion("purrr")
#> [1] '1.0.2'
library(furrr); packageVersion("furrr")
#> Loading required package: future
#> Warning: package 'future' was built under R version 4.3.3
#> [1] '0.3.1'
library(dplyr); packageVersion("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#> [1] '1.1.4'
library(stringr); packageVersion("stringr")
#> [1] '1.5.1'
library(metacoder); packageVersion("metacoder")
#> This is metacoder version 0.3.7 (stable)
#>
#> Attaching package: 'metacoder'
#> The following object is masked from 'package:ggplot2':
#>
#> map_data
#> The following object is masked from 'package:phyloseq':
#>
#> filter_taxa
#> [1] '0.3.7'
library(data.table); packageVersion("data.table")
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
#> [1] '1.15.4'
library(decontam); packageVersion("decontam")
#> [1] '1.22.0'
library(tidyr); packageVersion("tidyr")
#> [1] '1.3.1'
library(purrr); packageVersion("purrr")
#> [1] '1.0.2'
library(forcats); packageVersion("forcats")
#> [1] '1.0.0'
library(Biostrings); packageVersion("Biostrings")
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, aperm, append, as.data.frame, basename, cbind,
#> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
#> get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
#> match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
#> Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
#> table, tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#> Loading required package: stats4
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:tidyr':
#>
#> expand
#> The following objects are masked from 'package:data.table':
#>
#> first, second
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following object is masked from 'package:utils':
#>
#> findMatches
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:data.table':
#>
#> shift
#> The following object is masked from 'package:metacoder':
#>
#> reverse
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#> The following object is masked from 'package:purrr':
#>
#> reduce
#> The following object is masked from 'package:phyloseq':
#>
#> distance
#> Loading required package: XVector
#>
#> Attaching package: 'XVector'
#> The following object is masked from 'package:purrr':
#>
#> compact
#> Loading required package: GenomeInfoDb
#> Warning: package 'GenomeInfoDb' was built under R version 4.3.3
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:metacoder':
#>
#> complement
#> The following object is masked from 'package:base':
#>
#> strsplit
#> [1] '2.70.3'
library(magick); packageVersion("magick")
#> Warning: package 'magick' was built under R version 4.3.3
#> Linking to ImageMagick 6.9.12.93
#> Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
#> Disabled features: fftw, ghostscript, x11
#> [1] '2.8.4'
library(pdftools);packageVersion("pdftools")
#> Warning: package 'pdftools' was built under R version 4.3.3
#> Using poppler version 23.04.0
#> [1] '3.4.1'
library(vegan); packageVersion("vegan")
#> Warning: package 'vegan' was built under R version 4.3.3
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.6-6.1
#> [1] '2.6.6.1'
library(grid); packageVersion("grid")
#>
#> Attaching package: 'grid'
#> The following object is masked from 'package:Biostrings':
#>
#> pattern
#> [1] '4.3.2'
knitr::opts_knit$set(root.dir = "~/benchmark_demulticoder")
seed <- 1
set.seed(seed)
taxa_its<- read.table("standard_workflow/its/data/its_taxa.out", sep="\t", stringsAsFactors=F,header=T)
taxa_filt_its <- taxa_its %>%
mutate(across(starts_with("tax."), ~ ifelse(get(str_replace(cur_column(), "tax.", "boot.")) < 0, NA, .))) %>%
set_names(str_replace(names(.), "tax.", ""))
remove_prefix_its <- function(data) {
data <- gsub("[a-z]__", "", data)
return(data)
}
taxa_filt_its[] <- lapply(taxa_filt_its, remove_prefix_its)
revise_species_its <- function(genus, species) {
revise_species_its <- ifelse(is.na(species), NA, paste(genus, species, sep = "_"))
return(revise_species_its)
}
taxa_filt_its$Species <- revise_species_its(taxa_filt_its$Genus, taxa_filt_its$Species)
taxa_its_df <- as.data.frame(taxa_filt_its)
taxa_its_df$sequence <- rownames(taxa_its_df)
taxa_its_df <- taxa_its_df[, c("sequence", setdiff(names(taxa_its_df), "sequence"))]
rownames(taxa_its_df) <- NULL
#Load ASV matrix now
seqtab_nochim_its <- read.table("standard_workflow/its/data/its_seqtab_nochim.out")
seqtab_nochim_its <- t(seqtab_nochim_its)
seqtab_nochim_its_df <- as.data.frame(seqtab_nochim_its)
seqtab_nochim_its_df$sequence <- rownames(seqtab_nochim_its_df)
seqtab_nochim_its_df <- seqtab_nochim_its_df[, c("sequence", setdiff(names(seqtab_nochim_its_df), "sequence"))]
rownames(seqtab_nochim_its_df) <- NULL
#Combine matrices
combined_df_its <- merge(seqtab_nochim_its_df, taxa_its_df, by = "sequence", all = TRUE)
taxonomic_cols <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species")
bootstrap_cols <- c("boot.Kingdom", "boot.Phylum", "boot.Class", "boot.Order",
"boot.Family", "boot.Genus", "boot.Species")
dada2_tax <- mapply(function(tax, boot, level) {
paste(tax, boot, level, sep = "--")
}, combined_df_its[, taxonomic_cols], combined_df_its[, bootstrap_cols],
c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))
combined_df_its$dada2_tax <- apply(dada2_tax, 1, paste, collapse = ";")
#Remove all other tax and boot columns
combined_df_its <- combined_df_its[, !(names(combined_df_its) %in% c(taxonomic_cols, bootstrap_cols))]
# Reorder columns to put 'taxa_bootstrap_combined' after 'sequence'
combined_df_its <- combined_df_its[, c("sequence", "dada2_tax", setdiff(names(combined_df_its), c("sequence", "dada2_tax")))]
#Add domain-100-Eukaryota; prefix before Kingdom
combined_df_its$dada2_tax <- gsub("Fungi", "Eukaryota--100--Domain;Fungi", combined_df_its$dada2_tax)
#Load metadata
samdf_its<- read.csv("standard_workflow/its/data/metadata_its.csv")
#Save file
write.csv(combined_df_its, "standard_workflow/its/data/its_abundance_matrix_combined_dada2workflow.csv")taxa_rps10<- read.table("standard_workflow/rps10/data/rps10_taxa.out", sep="\t", stringsAsFactors=F,header=T)
taxa_filt_rps10 <- taxa_rps10 %>%
mutate(across(starts_with("tax."), ~ ifelse(get(str_replace(cur_column(), "tax.", "boot.")) < 0, NA, .))) %>%
set_names(str_replace(names(.), "tax.", ""))
remove_prefix_rps10 <- function(data) {
data <- gsub("[a-z]__", "", data)
return(data)
}
taxa_filt_rps10[] <- lapply(taxa_filt_rps10, remove_prefix_rps10)
taxa_rps10_df <- as.data.frame(taxa_filt_rps10)
taxa_rps10_df$sequence <- rownames(taxa_rps10_df)
taxa_rps10_df <- taxa_rps10_df[, c("sequence", setdiff(names(taxa_rps10_df), "sequence"))]
rownames(taxa_rps10_df) <- NULL
#Load ASV matrix now
seqtab_nochim_rps10 <- read.table("standard_workflow/rps10/data/rps10_seqtab_nochim.out")
rownames(seqtab_nochim_rps10) <- gsub("__", "_", rownames(seqtab_nochim_rps10))
seqtab_nochim_rps10 <- t(seqtab_nochim_rps10)
seqtab_nochim_rps10_df <- as.data.frame(seqtab_nochim_rps10)
seqtab_nochim_rps10_df$sequence <- rownames(seqtab_nochim_rps10_df)
seqtab_nochim_rps10_df <- seqtab_nochim_rps10_df[, c("sequence", setdiff(names(seqtab_nochim_rps10_df), "sequence"))]
rownames(seqtab_nochim_rps10_df) <- NULL
#Combine matrices
combined_df_rps10 <- merge(seqtab_nochim_rps10_df, taxa_rps10_df, by = "sequence", all = TRUE)
taxonomic_cols <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species")
bootstrap_cols <- c("boot.Kingdom", "boot.Phylum", "boot.Class", "boot.Order",
"boot.Family", "boot.Genus", "boot.Species")
dada2_tax <- mapply(function(tax, boot, level) {
paste(tax, boot, level, sep = "--")
}, combined_df_rps10[, taxonomic_cols], combined_df_rps10[, bootstrap_cols],
c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))
combined_df_rps10$dada2_tax <- apply(dada2_tax, 1, paste, collapse = ";")
#Remove all other tax and boot columns
combined_df_rps10 <- combined_df_rps10[, !(names(combined_df_rps10) %in% c(taxonomic_cols, bootstrap_cols))]
# Reorder columns to put 'taxa_bootstrap_combined' after 'sequence'
combined_df_rps10 <- combined_df_rps10[, c("sequence", "dada2_tax", setdiff(names(combined_df_rps10), c("sequence", "dada2_tax")))]
#Add domain-100-Eukaryota; prefix before Kingdom
combined_df_rps10$dada2_tax <- gsub("Stramenopila", "Eukaryota--100--Domain;Stramenopila", combined_df_rps10$dada2_tax)
#Load metadata
samdf_rps10<- read.csv("standard_workflow/rps10/data/metadata_rps10.csv")
#Save file
write.csv(combined_df_rps10, "standard_workflow/rps10/data/rps10_abundance_matrix_combined_dada2workflow.csv")
#Load DADA2 metadata file
samdf_rps10<- read.csv("standard_workflow/rps10/data/metadata_rps10.csv")sample_cols_its <- setdiff(names(combined_df_its), c("sequence", "dada2_tax"))
sample_cols_rps10 <- setdiff(names(combined_df_rps10), c("sequence", "dada2_tax"))
if(!all(sample_cols_its == sample_cols_rps10)) {
stop("Sample columns do not match between ITS and RPS10 dataframes!")
}
abundance <- rbind(
combined_df_its[, c("sequence", "dada2_tax", sample_cols_its)], # ITS data
combined_df_rps10[, c("sequence", "dada2_tax", sample_cols_rps10)] # RPS10 data
)
write.csv(abundance, "standard_workflow/combined_data/final_combined_abundance_matrix_standard_workflow.csv")
#Let's also load metadata file-consistent with demulticoder analysis
metadata_path <- file.path("standard_workflow/combined_data/metadata.csv")
metadata <- read_csv(metadata_path)
#> Rows: 173 Columns: 9
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): sample_name, well, organism, sample_type
#> dbl (3): plate, path_conc, experiment
#> lgl (2): flooded, is_ambiguous
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(metadata)
#> # A tibble: 173 × 9
#> sample_name plate well organism flooded path_conc experiment sample_type
#> <chr> <dbl> <chr> <chr> <lgl> <dbl> <dbl> <chr>
#> 1 S1 1 A01 Cry TRUE 100 1 Sample
#> 2 S10 1 B02 Cry TRUE 1 1 Sample
#> 3 S100 2 B02 Cin TRUE 1 2 Sample
#> 4 S101 2 C02 Plu FALSE 1 2 Sample
#> 5 S102 2 D02 Cry TRUE 1 2 Sample
#> 6 S103 2 F02 Cin FALSE 1 2 Sample
#> 7 S104 2 G02 Plu FALSE 1 2 Sample
#> 8 S105 2 H02 Cry TRUE 100 2 Sample
#> 9 S106 2 A03 Plu FALSE 100 2 Sample
#> 10 S107 2 B03 Cin TRUE 100 2 Sample
#> # ℹ 163 more rows
#> # ℹ 1 more variable: is_ambiguous <lgl>its_summary_reads<-summary(track_reads_standard_wf_its)
print(its_summary_reads)
#> input filtered denoisedF denoisedR
#> Min. : 110 Min. : 5 Min. : 1 Min. : 1
#> 1st Qu.: 23642 1st Qu.:12765 1st Qu.:12472 1st Qu.:12342
#> Median : 37530 Median :22939 Median :22628 Median :22511
#> Mean : 43474 Mean :25532 Mean :25285 Mean :25155
#> 3rd Qu.: 58507 3rd Qu.:31793 3rd Qu.:31567 3rd Qu.:31446
#> Max. :137025 Max. :87247 Max. :87073 Max. :86985
#> merged nonchim
#> Min. : 0 Min. : 0
#> 1st Qu.:11864 1st Qu.:11864
#> Median :21767 Median :21767
#> Mean :24398 Mean :24307
#> 3rd Qu.:30396 3rd Qu.:29957
#> Max. :86697 Max. :86685
rps10_summary_reads<-summary(track_reads_standard_wf_rps10)
print(rps10_summary_reads)
#> input filtered denoisedF denoisedR
#> Min. : 94 Min. : 26 Min. : 7 Min. : 6
#> 1st Qu.: 38305 1st Qu.: 29337 1st Qu.: 29321 1st Qu.: 29314
#> Median : 61388 Median : 48530 Median : 48503 Median : 48497
#> Mean : 74279 Mean : 56779 Mean : 56715 Mean : 56712
#> 3rd Qu.: 95914 3rd Qu.: 75033 3rd Qu.: 74536 3rd Qu.: 74530
#> Max. :264287 Max. :219173 Max. :218786 Max. :219079
#> merged nonchim
#> Min. : 0 Min. : 0
#> 1st Qu.: 29277 1st Qu.: 29277
#> Median : 48063 Median : 48063
#> Mean : 56307 Mean : 55623
#> 3rd Qu.: 73544 3rd Qu.: 73431
#> Max. :218307 Max. :217160# Function to extract taxonomic levels and count unique entries with bootstrap filtering
# Function to extract taxonomic levels and count unique entries
analyze_taxonomy <- function(data) {
# Get taxonomy data
tax_data <- data$dada2_tax
# Split taxonomy string into components
tax_splits <- strsplit(tax_data, ";")
# Function to safely extract taxonomic level with proper rank checking
extract_tax_level <- function(tax_array, rank) {
# Find the position containing the rank
rank_pos <- grep(rank, tax_array)
if(length(rank_pos) > 0) {
# Extract and clean the taxonomy name
tax <- tax_array[rank_pos]
# Remove the rank pattern and any leading/trailing whitespace
cleaned_tax <- trimws(gsub(paste0("--\\d+--", rank), "", tax))
return(cleaned_tax)
}
return(NA)
}
# Extract each taxonomic level with proper hierarchy checking
families <- sapply(tax_splits, function(x) extract_tax_level(x, "Family"))
genera <- sapply(tax_splits, function(x) extract_tax_level(x, "Genus"))
species <- sapply(tax_splits, function(x) extract_tax_level(x, "Species"))
# Create a data frame to check hierarchy consistency
tax_df <- data.frame(
Family = families,
Genus = genera,
Species = species,
stringsAsFactors = FALSE
)
# Remove NA values before counting
families <- families[!is.na(families)]
genera <- genera[!is.na(genera)]
species <- species[!is.na(species)]
# Count unique entries
unique_counts <- list(
families = length(unique(families)),
genera = length(unique(genera)),
species = length(unique(species))
)
# Get prevalence with hierarchy checking
family_prev <- table(families)
genus_prev <- table(genera)
species_prev <- table(species)
# Sort prevalence tables
family_prev <- sort(family_prev, decreasing = TRUE)
genus_prev <- sort(genus_prev, decreasing = TRUE)
species_prev <- sort(species_prev, decreasing = TRUE)
# Print top 5 of each level for verification
cat("\nTop 5 most prevalent Families:\n")
print(head(family_prev, 5))
cat("\nTop 5 most prevalent Genera:\n")
print(head(genus_prev, 5))
cat("\nTop 5 most prevalent Species:\n")
print(head(species_prev, 5))
# Get most prevalent taxa with hierarchy verification
most_prevalent <- list(
family = if(length(family_prev) > 0) names(family_prev)[1] else "None found",
genus = if(length(genus_prev) > 0) names(genus_prev)[1] else "None found",
species = if(length(species_prev) > 0) names(species_prev)[1] else "None found"
)
return(list(
unique_counts = unique_counts,
most_prevalent = most_prevalent,
tax_df = tax_df # Return full taxonomy dataframe for inspection
))
}
# Run analysis for both datasets
if(exists("combined_df_rps10")) {
rps10_results <- analyze_taxonomy(combined_df_rps10)
}
#>
#> Top 5 most prevalent Families:
#> families
#> Pythiaceae Peronosporaceae Saprolegniaceae Lagenidiaceae Saplolegniaceae
#> 171 126 20 11 9
#>
#> Top 5 most prevalent Genera:
#> genera
#> Pythium Phytophthora Peronospora Aphanomyces Saprolegnia
#> 152 96 16 15 11
#>
#> Top 5 most prevalent Species:
#> species
#> Phytophthora_sp.kununurra Phytophthora_citrophthora Pythium_dissotocum
#> 24 19 19
#> Pythium_lutarium Phytophthora_sojae
#> 14 13
if(exists("combined_df_its")) {
its_results <- analyze_taxonomy(combined_df_its)
}
#>
#> Top 5 most prevalent Families:
#> families
#> Fungi_fam_Incertae_sedis Serendipitaceae
#> 356 201
#> Rozellomycota_fam_Incertae_sedis Hyaloscyphaceae
#> 176 157
#> Tuberaceae
#> 128
#>
#> Top 5 most prevalent Genera:
#> genera
#> Fungi_gen_Incertae_sedis Serendipita
#> 356 197
#> Rozellomycota_gen_Incertae_sedis Tuber
#> 176 128
#> Hyaloscypha
#> 94
#>
#> Top 5 most prevalent Species:
#> species
#> NA Tuber_pseudobrumale
#> 1315 128
#> Pezoloma_ericae Hyaloscypha_finlandica
#> 31 20
#> Clathrosphaerina_zalewskii
#> 19First we will configure the combined taxmap object
#load reference database
rps10_database <- read_fasta("demulticoder/data/rps10_reference_db.fa")
innoc_species <- c(Cin = "Phytophthora_cinnamomi", Plu = "Phytophthora_plurivora", Cry = "Pythium_cryptoirregulare")
innoc_regex <- paste0('(', paste0(innoc_species, collapse = '|'), ')')
innoc_seqs <- rps10_database[grepl(names(rps10_database), pattern = innoc_regex)]
innoc_seqs <- innoc_seqs[! duplicated(innoc_seqs)]
names(innoc_seqs) <- str_match(names(innoc_seqs), pattern = innoc_regex)[, 2]
iupac_match <- function(asv_chars, ref_chars) {
map2_lgl(asv_chars, ref_chars, function(asv, ref) grepl(asv, pattern = paste0('[', IUPAC_CODE_MAP[ref], ']+')))
}
# Count number of mismatches in an alignment, allowing for IUPAC codes in reference
align_mismatch <- function(alignment) {
asv_chars <- strsplit(as.character(alignment@pattern), '')[[1]]
ref_chars <- strsplit(as.character(alignment@subject), '')[[1]]
sum(! iupac_match(asv_chars, ref_chars))
}
# Align each sequence to each asv and make table of results"
aligned_data_path <- file.path("standard_workflow/combined_data", "infection_aligned_data.rds")
if (file.exists(aligned_data_path)) {
aligned_data <- readRDS(aligned_data_path)
} else {
aligned_data <- future_map_dfr(seq_along(innoc_seqs), function(i) {
aligned <- lapply(abundance$sequence, function(asv) pairwiseAlignment(pattern = asv, subject = innoc_seqs[i], type = 'global-local'))
print(paste("Processing species:", names(innoc_seqs)[i]))
tibble(
species = names(innoc_seqs)[i],
align_len = map_dbl(aligned, nchar),
mismatch = map_dbl(aligned, align_mismatch),
pid = (align_len - mismatch) / align_len,
asv_seq = abundance$sequence,
ref_seq = innoc_seqs[i],
alignment = aligned
)
})
saveRDS(aligned_data, file = aligned_data_path)
}
# Convert ASV abundances to proportions for use with matches
abundance_prop <- abundance
abundance_prop[metadata$sample_name] <- lapply(abundance_prop[metadata$sample_name], function(counts) counts/sum(counts))
innoculum_asv_mismatch_threshold <-1
# Sum the read counts of ASVs representing the same species used for inoculation
infection_data <- aligned_data %>%
filter(mismatch <= innoculum_asv_mismatch_threshold) %>%
left_join(abundance_prop, by = c(asv_seq = "sequence")) %>%
group_by(asv_seq) %>% slice_min(mismatch, with_ties = FALSE) # Only consider the best match per ASVmetadata <-samdf_its
## Update abundance matrix and metadata with results
inoculum_asv_key <- setNames(infection_data$species, infection_data$asv_seq)
abundance <- mutate(abundance, is_inoculum = sequence %in% infection_data$asv_seq, inoculum = inoculum_asv_key[sequence], .after = "dada2_tax")
write_csv(abundance, file.path('standard_workflow/combined_data', 'abundance_with_infection_data.csv'))
#Let's append metadata table with more info
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
mutate(species = species_name_key[species]) %>%
mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
left_join(sample_infection_data, by = "sample_name")
# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(NA)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]])
}
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
if (metadata$sample_type[i] == "Mock community" || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(sum(metadata[i, prop_cols]))
} else {
expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
return(sum(metadata[i, prop_cols]))
}
})
# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(all_innoc == 0)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]] > 0)
}
})
# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(all_innoc == 0)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
}
})
# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
#>
#> FALSE TRUE
#> 88 81
table(metadata$only_expected_innoc)
#>
#> FALSE TRUE
#> 107 62
metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
#> Cin Control Cry Plu
#> 26 9 16 8
# Save new metadata
write_csv(metadata, file.path('standard_workflow/combined_data', 'metadata_with_infection_data.csv'))# Get per-sample proportions of reads
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
tidyr::gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
mutate(species = species_name_key[species]) %>%
mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
left_join(sample_infection_data, by = "sample_name")
# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(NA)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]])
}
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
if (metadata$sample_type[i] == "Mock community" || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(sum(metadata[i, prop_cols]))
} else {
expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
return(sum(metadata[i, prop_cols]))
}
})
# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(all_innoc == 0)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]] > 0)
}
})
# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
if (metadata$sample_type[i] == "Mock community") {
return(NA)
} else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
return(all_innoc == 0)
} else {
prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
}
})
# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
#>
#> FALSE TRUE
#> 88 81
table(metadata$only_expected_innoc)
#>
#> FALSE TRUE
#> 107 62
metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
#> Cin Control Cry Plu
#> 26 9 16 8
# Save new metadata
write_csv(metadata, file.path('standard_workflow/combined_data', 'metadata_with_infection_data.csv'))metadata <- filter(metadata, ! is_ambiguous)
abundance <- filter(abundance, ! is_inoculum)
#Filter out mock community samples and also any mock community samples as well
metadata <- filter(metadata, sample_type != "Mock community" & sample_type != "Negative control")
obj <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
class_regex = '^(.+)--(.+)--(.+)$',
class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj$data) <- c('abund', 'score')
obj <- transmute_obs(obj, 'score', sequence = sequence[input_index], boot = boot, rank = rank)
# For diversity calculations, we'll use proportions of read depth
# We'll set any proportion to 0 that is less than the inverse of the read count of the non-control sample with the fewest reads.
# This should account for unequal sample read depth without the randomness of rarefaction.
metadata$raw_count <- colSums(obj$data$abund[, metadata$sample_name])
lowest_count <- min(metadata$raw_count[! is.na(metadata$organism)])
lowest_count
#> [1] 12661
obj$data$prop <- calc_obs_props(obj, data = 'abund', cols = metadata$sample_name)
#> Calculating proportions from counts for 162 columns for 2697 observations.
obj$data$prop <- zero_low_counts(obj, data = 'prop', min_count = 1 / lowest_count, cols = metadata$sample_name)
#> Zeroing 2180 of 436914 counts less than 7.89827027880894e-05.
obj$data$prop
#> # A tibble: 2,697 × 163
#> taxon_id S1 S10 S100 S101 S102 S103 S104 S105
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 bfz 0 0 0 0 0 0 0 0
#> 2 bga 0 0 0 0 0 0 0 0
#> 3 bgb 0 0 0 0 0 0 0 0
#> 4 bgc 0 0 0 0 0 0 0 0
#> 5 bgc 0 0 0 0 0 0 0.00687 0
#> 6 bgd 0 0 0 0.000587 0 0 0 0.000555
#> 7 bge 0.000370 0.00128 0.000180 0.000681 0 0 0 0
#> 8 bgd 0 0 0 0 0 0.000911 0.000634 0
#> 9 bgf 0 0 0 0 0 0 0 0
#> 10 bgf 0 0 0 0 0 0 0 0
#> # ℹ 2,687 more rows
#> # ℹ 154 more variables: S106 <dbl>, S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> # S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>, S114 <dbl>,
#> # S115 <dbl>, S116 <dbl>, S117 <dbl>, S118 <dbl>, S119 <dbl>, S12 <dbl>,
#> # S120 <dbl>, S121 <dbl>, S122 <dbl>, S123 <dbl>, S124 <dbl>, S125 <dbl>,
#> # S126 <dbl>, S127 <dbl>, S128 <dbl>, S129 <dbl>, S13 <dbl>, S130 <dbl>,
#> # S131 <dbl>, S132 <dbl>, S133 <dbl>, S134 <dbl>, S135 <dbl>, S136 <dbl>, …
obj$data$prop[metadata$sample_name] <- map(metadata$sample_name, function(id) {
out <- obj$data$prop[[id]]
out[is.na(out) | is.nan(out)] <- 0
out
})
obj$data$prop
#> # A tibble: 2,697 × 163
#> taxon_id S1 S10 S100 S101 S102 S103 S104 S105
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 bfz 0 0 0 0 0 0 0 0
#> 2 bga 0 0 0 0 0 0 0 0
#> 3 bgb 0 0 0 0 0 0 0 0
#> 4 bgc 0 0 0 0 0 0 0 0
#> 5 bgc 0 0 0 0 0 0 0.00687 0
#> 6 bgd 0 0 0 0.000587 0 0 0 0.000555
#> 7 bge 0.000370 0.00128 0.000180 0.000681 0 0 0 0
#> 8 bgd 0 0 0 0 0 0.000911 0.000634 0
#> 9 bgf 0 0 0 0 0 0 0 0
#> 10 bgf 0 0 0 0 0 0 0 0
#> # ℹ 2,687 more rows
#> # ℹ 154 more variables: S106 <dbl>, S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> # S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>, S114 <dbl>,
#> # S115 <dbl>, S116 <dbl>, S117 <dbl>, S118 <dbl>, S119 <dbl>, S12 <dbl>,
#> # S120 <dbl>, S121 <dbl>, S122 <dbl>, S123 <dbl>, S124 <dbl>, S125 <dbl>,
#> # S126 <dbl>, S127 <dbl>, S128 <dbl>, S129 <dbl>, S13 <dbl>, S130 <dbl>,
#> # S131 <dbl>, S132 <dbl>, S133 <dbl>, S134 <dbl>, S135 <dbl>, S136 <dbl>, …
#Let's modify the metadata sheet
metadata <- metadata %>%
mutate(organism = fct_relevel(ordered(organism), "Control", "Cin", "Plu", "Cry"))abund_table <- obj$data$abund[metadata$sample_name]
metadata$richness <- vegan::specnumber(abund_table, MARGIN = 2)
metadata$shannon <- vegan::diversity(abund_table, MARGIN = 2, index = "shannon")
metadata$invsimpson <- vegan::diversity(abund_table, MARGIN = 2, index = "invsimpson")
write.csv(metadata, file.path("standard_workflow", "combined_results/alpha_diversity.csv"))
print(metadata)
#> sample_name plate well organism flooded path_conc experiment sample_type
#> 1 S1 1 A01 Cry TRUE 100 1 Sample
#> 2 S10 1 B02 Cry TRUE 1 1 Sample
#> 3 S100 2 B02 Cin TRUE 1 2 Sample
#> 4 S101 2 C02 Plu FALSE 1 2 Sample
#> 5 S102 2 D02 Cry TRUE 1 2 Sample
#> 6 S103 2 F02 Cin FALSE 1 2 Sample
#> 7 S104 2 G02 Plu FALSE 1 2 Sample
#> 8 S105 2 H02 Cry TRUE 100 2 Sample
#> 9 S106 2 A03 Plu FALSE 100 2 Sample
#> 10 S107 2 B03 Cin TRUE 100 2 Sample
#> 11 S108 2 C03 Control TRUE 0 2 Sample
#> 12 S109 2 D03 Cin TRUE 1 2 Sample
#> 13 S11 1 C02 Plu FALSE 1 1 Sample
#> 14 S110 2 E03 Cry TRUE 100 2 Sample
#> 15 S111 2 F03 Cin FALSE 100 2 Sample
#> 16 S112 2 G03 Control TRUE 0 2 Sample
#> 17 S113 2 H03 Cry FALSE 1 2 Sample
#> 18 S114 2 A04 Plu FALSE 100 2 Sample
#> 19 S115 2 B04 Control FALSE 0 2 Sample
#> 20 S116 2 D04 Plu TRUE 100 2 Sample
#> 21 S117 2 E04 Cry TRUE 1 2 Sample
#> 22 S118 2 F04 Cry FALSE 100 2 Sample
#> 23 S119 2 G04 Control TRUE 0 2 Sample
#> 24 S12 1 D02 Plu TRUE 1 1 Sample
#> 25 S120 2 H04 Cry FALSE 1 2 Sample
#> 26 S121 2 A05 Cin TRUE 100 2 Sample
#> 27 S122 2 B05 Cry FALSE 100 2 Sample
#> 28 S123 2 C05 Plu TRUE 1 2 Sample
#> 29 S124 2 D05 Cry TRUE 1 2 Sample
#> 30 S125 2 E05 Plu FALSE 1 2 Sample
#> 31 S126 2 F05 Plu TRUE 1 2 Sample
#> 32 S127 2 G05 Plu FALSE 100 2 Sample
#> 33 S128 2 H05 Plu TRUE 100 2 Sample
#> 34 S129 2 A06 Control TRUE 0 2 Sample
#> 35 S13 1 E02 Control TRUE 0 1 Sample
#> 36 S130 2 B06 Cin TRUE 100 2 Sample
#> 37 S131 2 C06 Cin TRUE 1 2 Sample
#> 38 S132 2 D06 Cry TRUE 1 2 Sample
#> 39 S133 2 E06 Cin FALSE 1 2 Sample
#> 40 S134 2 F06 Cin FALSE 100 2 Sample
#> 41 S135 2 G06 Cry FALSE 100 2 Sample
#> 42 S136 2 H06 Plu TRUE 1 2 Sample
#> 43 S137 2 B07 Cry TRUE 100 2 Sample
#> 44 S138 2 C07 Plu TRUE 100 2 Sample
#> 45 S139 2 D07 Cin TRUE 100 2 Sample
#> 46 S14 1 F02 Cin TRUE 1 1 Sample
#> 47 S140 2 E07 Plu FALSE 1 2 Sample
#> 48 S141 2 F07 Cry TRUE 100 2 Sample
#> 49 S142 2 G07 Control FALSE 0 2 Sample
#> 50 S143 2 H07 Cry FALSE 1 2 Sample
#> 51 S144 2 A08 Control FALSE 0 2 Sample
#> 52 S145 2 B08 Plu FALSE 1 2 Sample
#> 53 S146 2 C08 Cin FALSE 100 2 Sample
#> 54 S147 2 D08 Cry FALSE 100 2 Sample
#> 55 S148 2 E08 Cry FALSE 1 2 Sample
#> 56 S149 2 F08 Plu FALSE 100 2 Sample
#> 57 S15 1 H02 Cin FALSE 100 1 Sample
#> 58 S150 2 G08 Control TRUE 0 2 Sample
#> 59 S151 2 H08 Cin TRUE 1 2 Sample
#> 60 S152 2 A09 Cry TRUE 1 2 Sample
#> 61 S153 2 B09 Plu TRUE 1 2 Sample
#> 62 S154 2 C09 Cin FALSE 100 2 Sample
#> 63 S156 2 D09 Cin FALSE 1 2 Sample
#> 64 S157 2 E09 Cry FALSE 1 2 Sample
#> 65 S158 2 F09 Cry TRUE 1 2 Sample
#> 66 S159 2 H09 Plu FALSE 100 2 Sample
#> 67 S16 1 A03 Cry FALSE 100 1 Sample
#> 68 S160 2 A10 Control FALSE 0 2 Sample
#> 69 S161 2 B10 Cry FALSE 100 2 Sample
#> 70 S162 2 C10 Cin FALSE 1 2 Sample
#> 71 S163 2 D10 Plu TRUE 100 2 Sample
#> 72 S164 2 E10 Plu TRUE 1 2 Sample
#> 73 S165 2 F10 Cin FALSE 1 2 Sample
#> 74 S166 2 G10 Plu FALSE 100 2 Sample
#> 75 S167 2 H10 Plu TRUE 100 2 Sample
#> 76 S168 2 A11 Cin TRUE 1 2 Sample
#> 77 S169 2 B11 Cin FALSE 100 2 Sample
#> 78 S17 1 B03 Cry TRUE 100 1 Sample
#> 79 S18 1 C03 Control TRUE 0 1 Sample
#> 80 S2 1 B01 Cin FALSE 1 1 Sample
#> 81 S20 1 E03 Plu FALSE 100 1 Sample
#> 82 S21 1 F03 Cry FALSE 1 1 Sample
#> 83 S22 1 G03 Cry TRUE 100 1 Sample
#> 84 S23 1 H03 Plu TRUE 100 1 Sample
#> 85 S24 1 A04 Cin FALSE 100 1 Sample
#> 86 S25 1 B04 Control FALSE 0 1 Sample
#> 87 S26 1 C04 Control FALSE 0 1 Sample
#> 88 S27 1 D04 Cin FALSE 100 1 Sample
#> 89 S28 1 E04 Cin FALSE 100 1 Sample
#> 90 S29 1 F04 Cin FALSE 100 1 Sample
#> 91 S3 1 C01 Plu TRUE 1 1 Sample
#> 92 S31 1 H04 Plu FALSE 100 1 Sample
#> 93 S32 1 A05 Cry FALSE 100 1 Sample
#> 94 S33 1 B05 Control FALSE 0 1 Sample
#> 95 S34 1 C05 Plu FALSE 100 1 Sample
#> 96 S35 1 E05 Control TRUE 0 1 Sample
#> 97 S36 1 F05 Plu TRUE 100 1 Sample
#> 98 S37 1 G05 Cry TRUE 100 1 Sample
#> 99 S38 1 H05 Cry TRUE 1 1 Sample
#> 100 S39 1 A06 Cin TRUE 1 1 Sample
#> 101 S4 1 D01 Plu TRUE 1 1 Sample
#> 102 S40 1 B06 Cry FALSE 1 1 Sample
#> 103 S41 1 C06 Cry FALSE 100 1 Sample
#> 104 S42 1 D06 Plu TRUE 1 1 Sample
#> 105 S43 1 E06 Cry FALSE 1 1 Sample
#> 106 S45 1 G06 Cin TRUE 100 1 Sample
#> 107 S46 1 H06 Cin TRUE 1 1 Sample
#> 108 S47 1 A07 Plu TRUE 100 1 Sample
#> 109 S48 1 B07 Cin TRUE 100 1 Sample
#> 110 S49 1 C07 Cin TRUE 1 1 Sample
#> 111 S5 1 E01 Cin FALSE 1 1 Sample
#> 112 S50 1 D07 Control TRUE 0 1 Sample
#> 113 S51 1 E07 Cry FALSE 1 1 Sample
#> 114 S52 1 F07 Plu TRUE 1 1 Sample
#> 115 S53 1 G07 Plu FALSE 100 1 Sample
#> 116 S54 1 A08 Plu FALSE 100 1 Sample
#> 117 S55 1 B08 Plu FALSE 100 1 Sample
#> 118 S56 1 C08 Cry TRUE 100 1 Sample
#> 119 S57 1 D08 Cry FALSE 1 1 Sample
#> 120 S58 1 E08 Cin FALSE 1 1 Sample
#> 121 S59 1 F08 Cry FALSE 1 1 Sample
#> 122 S6 1 F01 Cry TRUE 1 1 Sample
#> 123 S60 1 G08 Cin FALSE 1 1 Sample
#> 124 S61 1 H08 Cin FALSE 1 1 Sample
#> 125 S62 1 A09 Cry TRUE 1 1 Sample
#> 126 S63 1 B09 Cin TRUE 1 1 Sample
#> 127 S64 1 C09 Control FALSE 0 1 Sample
#> 128 S65 1 D09 Plu TRUE 100 1 Sample
#> 129 S66 1 E09 Cin TRUE 1 1 Sample
#> 130 S68 1 G09 Plu FALSE 1 1 Sample
#> 131 S69 1 H09 Plu FALSE 1 1 Sample
#> 132 S7 1 G01 Cry TRUE 1 1 Sample
#> 133 S70 1 A10 Plu FALSE 1 1 Sample
#> 134 S72 1 D10 Plu FALSE 1 1 Sample
#> 135 S73 1 E10 Control FALSE 0 1 Sample
#> 136 S74 1 F10 Control TRUE 0 1 Sample
#> 137 S76 1 H10 Cin FALSE 1 1 Sample
#> 138 S77 1 A11 Plu FALSE 100 1 Sample
#> 139 S78 1 B11 Control FALSE 0 1 Sample
#> 140 S79 1 C11 Control TRUE 0 1 Sample
#> 141 S8 1 H01 Cin TRUE 100 1 Sample
#> 142 S80 1 D11 Cry TRUE 1 1 Sample
#> 143 S81 1 E11 Cin FALSE 100 1 Sample
#> 144 S82 1 F11 Cry TRUE 100 1 Sample
#> 145 S83 1 G11 Cry FALSE 100 1 Sample
#> 146 S84 1 H11 Cin TRUE 100 1 Sample
#> 147 S85 1 A12 Plu FALSE 1 2 Sample
#> 148 S86 1 B12 Plu TRUE 1 2 Sample
#> 149 S87 1 C12 Cry FALSE 100 2 Sample
#> 150 S88 1 D12 Cin TRUE 100 2 Sample
#> 151 S89 1 E12 Cry TRUE 100 2 Sample
#> 152 S9 1 A02 Cry FALSE 100 1 Sample
#> 153 S90 1 F12 Cry TRUE 100 2 Sample
#> 154 S91 2 A01 Control FALSE 0 2 Sample
#> 155 S92 2 B01 Cry FALSE 1 2 Sample
#> 156 S93 2 C01 Cin FALSE 100 2 Sample
#> 157 S94 2 D01 Control FALSE 0 2 Sample
#> 158 S95 2 E01 Control TRUE 0 2 Sample
#> 159 S96 2 F01 Cin TRUE 1 2 Sample
#> 160 S97 2 G01 Cin TRUE 100 2 Sample
#> 161 S98 2 H01 Plu TRUE 100 2 Sample
#> 162 S99 2 A02 Cin FALSE 1 2 Sample
#> is_ambiguous cin_prop cry_prop plu_prop expected_innoc_prop
#> 1 FALSE 0.000000e+00 0.0032119351 0.000000000 0.0032119351
#> 2 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 3 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 4 FALSE 0.000000e+00 0.0681409604 0.009248630 0.0092486300
#> 5 FALSE 2.104353e-04 0.0000000000 0.000000000 0.0000000000
#> 6 FALSE 2.356816e-03 0.0000000000 0.000000000 0.0023568163
#> 7 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 8 FALSE 0.000000e+00 0.0137158699 0.000000000 0.0137158699
#> 9 FALSE 0.000000e+00 0.0000000000 0.012172750 0.0121727499
#> 10 FALSE 8.617377e-01 0.0000000000 0.000000000 0.8617376934
#> 11 FALSE 1.903449e-03 0.0000000000 0.000000000 NA
#> 12 FALSE 8.884608e-02 0.0000000000 0.000000000 0.0888460788
#> 13 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 14 FALSE 1.485791e-04 0.0022756064 0.000000000 0.0022756064
#> 15 FALSE 6.575360e-01 0.0402978596 0.000000000 0.6575360117
#> 16 FALSE 5.599046e-04 0.0010161232 0.000000000 NA
#> 17 FALSE 2.965214e-04 0.0000000000 0.000000000 0.0000000000
#> 18 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 19 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 20 FALSE 2.239594e-04 0.0000000000 0.016434353 0.0164343532
#> 21 FALSE 0.000000e+00 0.0006071162 0.000000000 0.0006071162
#> 22 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 23 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 24 FALSE 0.000000e+00 0.0341041618 0.000000000 0.0000000000
#> 25 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 26 FALSE 8.058991e-01 0.0000000000 0.000000000 0.8058990655
#> 27 FALSE 8.216578e-04 0.0000000000 0.000000000 0.0000000000
#> 28 FALSE 7.052493e-04 0.0000000000 0.000000000 0.0000000000
#> 29 FALSE 8.237596e-04 0.1323752225 0.000000000 0.1323752225
#> 30 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 31 FALSE 0.000000e+00 0.0096353017 0.000000000 0.0000000000
#> 32 FALSE 4.516005e-03 0.0000000000 0.000000000 0.0000000000
#> 33 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 34 FALSE 1.260323e-04 0.0022420484 0.000000000 NA
#> 35 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 36 FALSE 8.338906e-01 0.0000000000 0.000000000 0.8338905565
#> 37 FALSE 3.360055e-01 0.0000000000 0.000000000 0.3360055474
#> 38 FALSE 1.061727e-03 0.0014822134 0.000000000 0.0014822134
#> 39 FALSE 5.096104e-01 0.0038817338 0.000000000 0.5096103556
#> 40 FALSE 8.038871e-01 0.0598278485 0.000000000 0.8038871094
#> 41 FALSE 5.952775e-04 0.0000000000 0.000000000 0.0000000000
#> 42 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 43 FALSE 3.314016e-04 0.0213619712 0.000000000 0.0213619712
#> 44 FALSE 2.953856e-04 0.0000000000 0.009667891 0.0096678908
#> 45 FALSE 6.821178e-01 0.0000000000 0.000000000 0.6821178209
#> 46 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 47 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 48 FALSE 0.000000e+00 0.0190983882 0.000000000 0.0190983882
#> 49 FALSE 3.565927e-04 0.0000000000 0.000000000 NA
#> 50 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 51 FALSE 1.526441e-04 0.0000000000 0.000000000 NA
#> 52 FALSE 9.834957e-04 0.0000000000 0.000000000 0.0000000000
#> 53 FALSE 8.702460e-01 0.0000000000 0.000000000 0.8702459781
#> 54 FALSE 1.032316e-03 0.0039198085 0.000000000 0.0039198085
#> 55 FALSE 4.233806e-04 0.0000000000 0.000000000 0.0000000000
#> 56 FALSE 3.401466e-04 0.0000000000 0.000000000 0.0000000000
#> 57 FALSE 6.002560e-01 0.0000000000 0.000000000 0.6002559521
#> 58 FALSE 0.000000e+00 0.0096928783 0.000000000 NA
#> 59 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 60 FALSE 9.091426e-05 0.1694300813 0.000000000 0.1694300813
#> 61 FALSE 2.342743e-03 0.0000000000 0.000000000 0.0000000000
#> 62 FALSE 7.245918e-01 0.0000000000 0.000000000 0.7245917650
#> 63 FALSE 3.705783e-01 0.0000000000 0.000000000 0.3705782938
#> 64 FALSE 4.948334e-04 0.0000000000 0.000000000 0.0000000000
#> 65 FALSE 5.876028e-04 0.0000000000 0.000000000 0.0000000000
#> 66 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 67 FALSE 0.000000e+00 0.5248189051 0.000000000 0.5248189051
#> 68 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 69 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 70 FALSE 2.646929e-01 0.0000000000 0.000000000 0.2646929245
#> 71 FALSE 4.594005e-04 0.0387475594 0.001981165 0.0019811646
#> 72 FALSE 9.639821e-04 0.0000000000 0.000000000 0.0000000000
#> 73 FALSE 0.000000e+00 0.0999563628 0.000000000 0.0000000000
#> 74 FALSE 0.000000e+00 0.0000000000 0.002561376 0.0025613757
#> 75 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 76 FALSE 5.927190e-04 0.0000000000 0.000000000 0.0005927190
#> 77 FALSE 7.771796e-01 0.0000000000 0.000000000 0.7771795912
#> 78 FALSE 0.000000e+00 0.1070520397 0.000000000 0.1070520397
#> 79 FALSE 0.000000e+00 0.1214449744 0.000000000 NA
#> 80 FALSE 0.000000e+00 0.2140226089 0.000000000 0.0000000000
#> 81 FALSE 0.000000e+00 0.0000000000 0.025472540 0.0254725400
#> 82 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 83 FALSE 0.000000e+00 0.1758458754 0.000000000 0.1758458754
#> 84 FALSE 0.000000e+00 0.0000000000 0.001552998 0.0015529984
#> 85 FALSE 3.306350e-01 0.0000000000 0.000000000 0.3306349557
#> 86 FALSE 3.615166e-04 0.0000000000 0.000000000 NA
#> 87 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 88 FALSE 2.491744e-01 0.0000000000 0.000000000 0.2491743614
#> 89 FALSE 1.839319e-01 0.0000000000 0.000000000 0.1839319005
#> 90 FALSE 5.419236e-01 0.0000000000 0.000000000 0.5419235512
#> 91 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 92 FALSE 2.819681e-04 0.0000000000 0.000000000 0.0000000000
#> 93 FALSE 0.000000e+00 0.0703178991 0.000000000 0.0703178991
#> 94 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 95 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 96 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 97 FALSE 0.000000e+00 0.0183459671 0.000000000 0.0000000000
#> 98 FALSE 0.000000e+00 0.0128762783 0.000000000 0.0128762783
#> 99 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 100 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 101 FALSE 4.498749e-04 0.0122006082 0.000000000 0.0000000000
#> 102 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 103 FALSE 0.000000e+00 0.1319423968 0.000000000 0.1319423968
#> 104 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 105 FALSE 0.000000e+00 0.1122069523 0.000000000 0.1122069523
#> 106 FALSE 8.016574e-01 0.0000000000 0.000000000 0.8016574239
#> 107 FALSE 2.072690e-04 0.0256647850 0.000000000 0.0002072690
#> 108 FALSE 0.000000e+00 0.0000000000 0.005374538 0.0053745381
#> 109 FALSE 7.775173e-01 0.0379334258 0.000000000 0.7775173370
#> 110 FALSE 0.000000e+00 0.5405549029 0.000000000 0.0000000000
#> 111 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 112 FALSE 1.805293e-04 0.0000000000 0.000000000 NA
#> 113 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 114 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 115 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 116 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 117 FALSE 0.000000e+00 0.0000000000 0.002114817 0.0021148169
#> 118 FALSE 0.000000e+00 0.0084429179 0.000000000 0.0084429179
#> 119 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 120 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 121 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 122 FALSE 2.324680e-04 0.0000000000 0.000000000 0.0000000000
#> 123 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 124 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 125 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 126 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 127 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 128 FALSE 2.753304e-04 0.0053383505 0.001514317 0.0015143172
#> 129 FALSE 3.134169e-02 0.0017392333 0.000000000 0.0313416943
#> 130 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 131 FALSE 5.528025e-04 0.0000000000 0.000000000 0.0000000000
#> 132 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 133 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 134 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 135 FALSE 4.605775e-03 0.0000000000 0.000000000 NA
#> 136 FALSE 0.000000e+00 0.1818410663 0.000000000 NA
#> 137 FALSE 4.535007e-03 0.0753929304 0.000000000 0.0045350065
#> 138 FALSE 0.000000e+00 0.0000000000 0.029522111 0.0295221108
#> 139 FALSE 1.419265e-04 0.0000000000 0.000000000 NA
#> 140 FALSE 0.000000e+00 0.0008006049 0.000000000 NA
#> 141 FALSE 8.299622e-01 0.0000000000 0.000000000 0.8299622226
#> 142 FALSE 1.934727e-04 0.0000000000 0.000000000 0.0000000000
#> 143 FALSE 4.375981e-01 0.0000000000 0.000000000 0.4375981106
#> 144 FALSE 0.000000e+00 0.0051668883 0.000000000 0.0051668883
#> 145 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 146 FALSE 6.952040e-01 0.0000000000 0.000000000 0.6952040387
#> 147 FALSE 0.000000e+00 0.0045444976 0.000000000 0.0000000000
#> 148 FALSE 1.398813e-04 0.1220697775 0.000000000 0.0000000000
#> 149 FALSE 0.000000e+00 0.0015766494 0.000000000 0.0015766494
#> 150 FALSE 8.364097e-01 0.0000000000 0.000000000 0.8364097093
#> 151 FALSE 0.000000e+00 0.0062249850 0.000000000 0.0062249850
#> 152 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> 153 FALSE 3.433582e-04 0.0202405268 0.000000000 0.0202405268
#> 154 FALSE 3.666286e-04 0.0000000000 0.000000000 NA
#> 155 FALSE 6.114546e-04 0.0000000000 0.000000000 0.0000000000
#> 156 FALSE 8.525916e-01 0.0000000000 0.000000000 0.8525915810
#> 157 FALSE 1.288285e-03 0.0000000000 0.000000000 NA
#> 158 FALSE 0.000000e+00 0.0000000000 0.000000000 NA
#> 159 FALSE 7.127968e-02 0.0000000000 0.000000000 0.0712796768
#> 160 FALSE 8.482561e-01 0.0000000000 0.000000000 0.8482560850
#> 161 FALSE 0.000000e+00 0.0000000000 0.330256227 0.3302562274
#> 162 FALSE 0.000000e+00 0.0000000000 0.000000000 0.0000000000
#> unexpected_innoc_prop expected_innoc only_expected_innoc valid_inoc
#> 1 0.000000e+00 TRUE TRUE TRUE
#> 2 0.000000e+00 FALSE FALSE FALSE
#> 3 0.000000e+00 FALSE FALSE FALSE
#> 4 6.814096e-02 TRUE FALSE FALSE
#> 5 2.104353e-04 FALSE FALSE FALSE
#> 6 0.000000e+00 TRUE TRUE TRUE
#> 7 0.000000e+00 FALSE FALSE FALSE
#> 8 0.000000e+00 TRUE TRUE TRUE
#> 9 0.000000e+00 TRUE TRUE TRUE
#> 10 0.000000e+00 TRUE TRUE TRUE
#> 11 1.903449e-03 FALSE FALSE FALSE
#> 12 0.000000e+00 TRUE TRUE TRUE
#> 13 0.000000e+00 FALSE FALSE FALSE
#> 14 1.485791e-04 TRUE FALSE FALSE
#> 15 4.029786e-02 TRUE FALSE FALSE
#> 16 1.576028e-03 FALSE FALSE FALSE
#> 17 2.965214e-04 FALSE FALSE FALSE
#> 18 0.000000e+00 FALSE FALSE FALSE
#> 19 0.000000e+00 TRUE TRUE TRUE
#> 20 2.239594e-04 TRUE FALSE FALSE
#> 21 0.000000e+00 TRUE TRUE TRUE
#> 22 0.000000e+00 FALSE FALSE FALSE
#> 23 0.000000e+00 TRUE TRUE TRUE
#> 24 3.410416e-02 FALSE FALSE FALSE
#> 25 0.000000e+00 FALSE FALSE FALSE
#> 26 0.000000e+00 TRUE TRUE TRUE
#> 27 8.216578e-04 FALSE FALSE FALSE
#> 28 7.052493e-04 FALSE FALSE FALSE
#> 29 8.237596e-04 TRUE FALSE FALSE
#> 30 0.000000e+00 FALSE FALSE FALSE
#> 31 9.635302e-03 FALSE FALSE FALSE
#> 32 4.516005e-03 FALSE FALSE FALSE
#> 33 0.000000e+00 FALSE FALSE FALSE
#> 34 2.368081e-03 FALSE FALSE FALSE
#> 35 0.000000e+00 TRUE TRUE TRUE
#> 36 0.000000e+00 TRUE TRUE TRUE
#> 37 0.000000e+00 TRUE TRUE TRUE
#> 38 1.061727e-03 TRUE FALSE FALSE
#> 39 3.881734e-03 TRUE FALSE FALSE
#> 40 5.982785e-02 TRUE FALSE FALSE
#> 41 5.952775e-04 FALSE FALSE FALSE
#> 42 0.000000e+00 FALSE FALSE FALSE
#> 43 3.314016e-04 TRUE FALSE FALSE
#> 44 2.953856e-04 TRUE FALSE FALSE
#> 45 0.000000e+00 TRUE TRUE TRUE
#> 46 0.000000e+00 FALSE FALSE FALSE
#> 47 0.000000e+00 FALSE FALSE FALSE
#> 48 0.000000e+00 TRUE TRUE TRUE
#> 49 3.565927e-04 FALSE FALSE FALSE
#> 50 0.000000e+00 FALSE FALSE FALSE
#> 51 1.526441e-04 FALSE FALSE FALSE
#> 52 9.834957e-04 FALSE FALSE FALSE
#> 53 0.000000e+00 TRUE TRUE TRUE
#> 54 1.032316e-03 TRUE FALSE FALSE
#> 55 4.233806e-04 FALSE FALSE FALSE
#> 56 3.401466e-04 FALSE FALSE FALSE
#> 57 0.000000e+00 TRUE TRUE TRUE
#> 58 9.692878e-03 FALSE FALSE FALSE
#> 59 0.000000e+00 FALSE FALSE FALSE
#> 60 9.091426e-05 TRUE FALSE TRUE
#> 61 2.342743e-03 FALSE FALSE FALSE
#> 62 0.000000e+00 TRUE TRUE TRUE
#> 63 0.000000e+00 TRUE TRUE TRUE
#> 64 4.948334e-04 FALSE FALSE FALSE
#> 65 5.876028e-04 FALSE FALSE FALSE
#> 66 0.000000e+00 FALSE FALSE FALSE
#> 67 0.000000e+00 TRUE TRUE TRUE
#> 68 0.000000e+00 TRUE TRUE TRUE
#> 69 0.000000e+00 FALSE FALSE FALSE
#> 70 0.000000e+00 TRUE TRUE TRUE
#> 71 3.920696e-02 TRUE FALSE FALSE
#> 72 9.639821e-04 FALSE FALSE FALSE
#> 73 9.995636e-02 FALSE FALSE FALSE
#> 74 0.000000e+00 TRUE TRUE TRUE
#> 75 0.000000e+00 FALSE FALSE FALSE
#> 76 0.000000e+00 TRUE TRUE TRUE
#> 77 0.000000e+00 TRUE TRUE TRUE
#> 78 0.000000e+00 TRUE TRUE TRUE
#> 79 1.214450e-01 FALSE FALSE FALSE
#> 80 2.140226e-01 FALSE FALSE FALSE
#> 81 0.000000e+00 TRUE TRUE TRUE
#> 82 0.000000e+00 FALSE FALSE FALSE
#> 83 0.000000e+00 TRUE TRUE TRUE
#> 84 0.000000e+00 TRUE TRUE TRUE
#> 85 0.000000e+00 TRUE TRUE TRUE
#> 86 3.615166e-04 FALSE FALSE FALSE
#> 87 0.000000e+00 TRUE TRUE TRUE
#> 88 0.000000e+00 TRUE TRUE TRUE
#> 89 0.000000e+00 TRUE TRUE TRUE
#> 90 0.000000e+00 TRUE TRUE TRUE
#> 91 0.000000e+00 FALSE FALSE FALSE
#> 92 2.819681e-04 FALSE FALSE FALSE
#> 93 0.000000e+00 TRUE TRUE TRUE
#> 94 0.000000e+00 TRUE TRUE TRUE
#> 95 0.000000e+00 FALSE FALSE FALSE
#> 96 0.000000e+00 TRUE TRUE TRUE
#> 97 1.834597e-02 FALSE FALSE FALSE
#> 98 0.000000e+00 TRUE TRUE TRUE
#> 99 0.000000e+00 FALSE FALSE FALSE
#> 100 0.000000e+00 FALSE FALSE FALSE
#> 101 1.265048e-02 FALSE FALSE FALSE
#> 102 0.000000e+00 FALSE FALSE FALSE
#> 103 0.000000e+00 TRUE TRUE TRUE
#> 104 0.000000e+00 FALSE FALSE FALSE
#> 105 0.000000e+00 TRUE TRUE TRUE
#> 106 0.000000e+00 TRUE TRUE TRUE
#> 107 2.566478e-02 TRUE FALSE FALSE
#> 108 0.000000e+00 TRUE TRUE TRUE
#> 109 3.793343e-02 TRUE FALSE FALSE
#> 110 5.405549e-01 FALSE FALSE FALSE
#> 111 0.000000e+00 FALSE FALSE FALSE
#> 112 1.805293e-04 FALSE FALSE FALSE
#> 113 0.000000e+00 FALSE FALSE FALSE
#> 114 0.000000e+00 FALSE FALSE FALSE
#> 115 0.000000e+00 FALSE FALSE FALSE
#> 116 0.000000e+00 FALSE FALSE FALSE
#> 117 0.000000e+00 TRUE TRUE TRUE
#> 118 0.000000e+00 TRUE TRUE TRUE
#> 119 0.000000e+00 FALSE FALSE FALSE
#> 120 0.000000e+00 FALSE FALSE FALSE
#> 121 0.000000e+00 FALSE FALSE FALSE
#> 122 2.324680e-04 FALSE FALSE FALSE
#> 123 0.000000e+00 FALSE FALSE FALSE
#> 124 0.000000e+00 FALSE FALSE FALSE
#> 125 0.000000e+00 FALSE FALSE FALSE
#> 126 0.000000e+00 FALSE FALSE FALSE
#> 127 0.000000e+00 TRUE TRUE TRUE
#> 128 5.613681e-03 TRUE FALSE FALSE
#> 129 1.739233e-03 TRUE FALSE FALSE
#> 130 0.000000e+00 FALSE FALSE FALSE
#> 131 5.528025e-04 FALSE FALSE FALSE
#> 132 0.000000e+00 FALSE FALSE FALSE
#> 133 0.000000e+00 FALSE FALSE FALSE
#> 134 0.000000e+00 FALSE FALSE FALSE
#> 135 4.605775e-03 FALSE FALSE FALSE
#> 136 1.818411e-01 FALSE FALSE FALSE
#> 137 7.539293e-02 TRUE FALSE FALSE
#> 138 0.000000e+00 TRUE TRUE TRUE
#> 139 1.419265e-04 FALSE FALSE FALSE
#> 140 8.006049e-04 FALSE FALSE FALSE
#> 141 0.000000e+00 TRUE TRUE TRUE
#> 142 1.934727e-04 FALSE FALSE FALSE
#> 143 0.000000e+00 TRUE TRUE TRUE
#> 144 0.000000e+00 TRUE TRUE TRUE
#> 145 0.000000e+00 FALSE FALSE FALSE
#> 146 0.000000e+00 TRUE TRUE TRUE
#> 147 4.544498e-03 FALSE FALSE FALSE
#> 148 1.222097e-01 FALSE FALSE FALSE
#> 149 0.000000e+00 TRUE TRUE TRUE
#> 150 0.000000e+00 TRUE TRUE TRUE
#> 151 0.000000e+00 TRUE TRUE TRUE
#> 152 0.000000e+00 FALSE FALSE FALSE
#> 153 3.433582e-04 TRUE FALSE FALSE
#> 154 3.666286e-04 FALSE FALSE FALSE
#> 155 6.114546e-04 FALSE FALSE FALSE
#> 156 0.000000e+00 TRUE TRUE TRUE
#> 157 1.288285e-03 FALSE FALSE FALSE
#> 158 0.000000e+00 TRUE TRUE TRUE
#> 159 0.000000e+00 TRUE TRUE TRUE
#> 160 0.000000e+00 TRUE TRUE TRUE
#> 161 0.000000e+00 TRUE TRUE TRUE
#> 162 0.000000e+00 FALSE FALSE FALSE
#> raw_count richness shannon invsimpson
#> 1 56792 88 2.153433 5.642725
#> 2 35094 89 2.010482 3.255848
#> 3 50052 92 2.313394 5.991114
#> 4 42596 116 2.556608 6.930950
#> 5 85519 106 2.345237 5.789557
#> 6 70268 161 2.341437 4.051853
#> 7 52090 96 1.452460 1.862109
#> 8 54075 92 2.573755 8.445344
#> 9 51693 103 1.946426 3.574197
#> 10 25517 99 2.762789 9.372035
#> 11 66594 109 2.304615 4.861717
#> 12 107364 115 2.085041 5.069044
#> 13 29103 83 2.610991 8.052769
#> 14 127568 153 2.651190 6.766775
#> 15 44555 131 3.098712 10.769495
#> 16 96293 90 1.768492 3.323680
#> 17 53943 101 1.752296 3.197930
#> 18 61275 129 2.301575 3.658643
#> 19 55156 77 2.398226 7.921531
#> 20 92205 61 1.514355 3.213147
#> 21 100414 68 1.558421 3.139064
#> 22 44403 59 1.522436 2.431314
#> 23 60550 65 1.598898 2.753033
#> 24 57975 65 1.541798 2.251030
#> 25 26000 81 2.198707 4.348543
#> 26 23638 74 2.575201 7.418115
#> 27 70531 141 2.558644 5.182404
#> 28 114772 117 1.390062 1.887378
#> 29 117852 109 1.647392 3.247214
#> 30 103530 126 2.518675 6.154125
#> 31 159214 114 1.862766 3.776559
#> 32 72523 93 1.832609 3.403918
#> 33 49524 63 2.113848 5.106784
#> 34 150398 127 1.962830 3.250105
#> 35 46829 126 2.466678 6.305792
#> 36 25289 109 2.984704 10.500101
#> 37 78520 134 2.728416 7.571191
#> 38 94886 135 2.458813 7.154550
#> 39 59533 135 2.558063 6.442152
#> 40 14424 73 2.373343 4.518265
#> 41 75550 114 2.416444 6.242318
#> 42 21536 29 1.514916 3.317566
#> 43 109225 69 1.602980 2.424974
#> 44 124012 130 1.639556 2.310273
#> 45 65389 147 2.500037 5.458671
#> 46 68190 71 1.572326 2.988710
#> 47 94920 131 2.575501 6.049414
#> 48 93476 101 1.854711 3.684141
#> 49 70083 150 2.318448 3.979918
#> 50 46976 86 2.095955 3.973083
#> 51 104803 139 2.557721 6.215693
#> 52 65010 137 2.358371 3.904012
#> 53 21712 145 3.466030 16.541237
#> 54 66509 129 2.463312 6.637095
#> 55 80272 175 3.053075 10.396626
#> 56 96984 206 2.777841 5.440807
#> 57 16555 52 1.796011 3.020966
#> 58 118822 107 1.707368 2.691869
#> 59 71485 72 1.899511 4.607572
#> 60 146156 123 1.135382 1.643594
#> 61 139253 147 2.509818 7.873239
#> 62 46246 114 2.821021 9.650663
#> 63 85658 92 1.732665 2.665200
#> 64 103014 180 2.583387 6.203608
#> 65 79939 137 1.986289 3.092920
#> 66 85884 132 2.026281 3.007894
#> 67 24009 86 2.688169 8.560356
#> 68 65809 120 2.655521 8.448870
#> 69 71949 133 2.385226 4.220238
#> 70 58415 120 2.488638 6.101319
#> 71 66787 111 2.421314 6.946012
#> 72 68400 100 1.728315 2.790616
#> 73 66002 134 2.668614 8.146460
#> 74 66590 92 2.310277 5.247890
#> 75 46289 72 1.762806 3.174792
#> 76 84307 143 2.695193 7.972655
#> 77 54384 159 2.756660 7.990911
#> 78 49864 112 2.219509 4.039626
#> 79 69629 118 2.359181 4.557221
#> 80 37128 105 2.759988 8.795219
#> 81 36039 99 2.050288 3.608854
#> 82 41086 142 2.956628 9.222725
#> 83 65109 88 2.150016 5.182877
#> 84 53362 58 1.744124 2.965370
#> 85 33323 139 2.774747 4.966373
#> 86 44242 106 1.819239 2.518617
#> 87 86935 102 1.845723 2.864478
#> 88 77980 110 2.528470 5.216023
#> 89 69600 142 3.242399 12.658888
#> 90 40865 129 3.024593 9.622248
#> 91 65271 72 1.485296 2.186816
#> 92 56728 99 2.363015 6.651952
#> 93 43048 125 2.312106 4.722848
#> 94 62948 111 1.881901 3.514578
#> 95 68220 143 2.762740 6.676584
#> 96 71903 93 1.191413 1.633253
#> 97 57628 134 2.506380 5.689434
#> 98 68536 94 1.762455 3.185332
#> 99 59064 122 2.244709 3.228133
#> 100 75151 76 1.237181 1.926689
#> 101 54868 109 2.049756 3.112191
#> 102 64214 113 2.402765 5.524994
#> 103 73600 138 2.328157 5.399680
#> 104 87706 91 1.348103 1.939790
#> 105 54910 149 2.994047 9.322615
#> 106 12661 62 1.535866 2.125492
#> 107 79897 140 2.708105 7.665384
#> 108 74025 84 1.132858 1.643401
#> 109 13306 64 2.694278 8.726761
#> 110 49281 117 2.895546 8.795751
#> 111 59189 115 1.897969 3.300783
#> 112 83074 98 1.730608 3.160816
#> 113 62143 94 1.883249 3.704288
#> 114 100948 88 2.303101 6.323346
#> 115 61823 151 2.886861 10.443703
#> 116 55780 106 2.076854 3.806199
#> 117 67947 95 1.513725 1.979928
#> 118 72462 83 1.092397 1.715881
#> 119 62568 88 1.736633 2.607348
#> 120 65700 112 2.300691 4.367626
#> 121 66024 117 1.850855 3.111033
#> 122 64510 118 1.954688 3.415014
#> 123 35562 74 2.322564 4.569763
#> 124 19815 81 2.625129 7.434818
#> 125 93912 99 1.388232 1.929344
#> 126 95057 98 1.183106 1.723783
#> 127 62182 97 1.545273 1.964611
#> 128 64910 65 1.479088 2.999758
#> 129 81724 113 1.854795 3.191583
#> 130 68448 110 2.017750 3.822747
#> 131 50623 131 2.370888 3.502136
#> 132 76937 127 2.098633 3.913458
#> 133 76768 144 2.553445 5.242416
#> 134 119130 129 2.143971 4.757131
#> 135 47330 85 2.443231 7.341955
#> 136 61753 85 1.091612 1.619210
#> 137 74052 137 2.440327 5.019960
#> 138 78895 231 3.630423 16.289708
#> 139 70449 126 2.327186 6.167252
#> 140 89860 164 2.095345 2.832785
#> 141 17284 71 2.503204 4.558187
#> 142 82683 146 1.671330 2.164197
#> 143 39768 97 2.593612 6.485594
#> 144 91264 154 2.982277 10.525227
#> 145 66945 153 3.025423 10.841875
#> 146 13041 56 2.245492 4.869926
#> 147 137123 168 2.737981 6.583945
#> 148 150606 130 1.931330 3.145668
#> 149 98788 133 1.838470 2.605106
#> 150 24532 92 2.400227 5.721092
#> 151 114464 119 2.417572 7.005522
#> 152 31313 70 1.784414 2.933062
#> 153 111246 136 2.351109 5.675295
#> 154 73617 111 2.201648 4.072389
#> 155 102970 91 2.255857 6.443645
#> 156 23315 94 3.082426 12.354025
#> 157 72096 112 2.254604 4.195193
#> 158 139998 110 1.379623 2.043255
#> 159 100690 90 1.893362 3.779776
#> 160 14975 67 2.743060 8.426671
#> 161 48827 81 2.062135 3.716522
#> 162 38071 120 2.975275 10.626657
plotted_factors <- c('Organism' = 'organism', 'Flooded' = 'flooded', 'Pathogen Concentration' = 'path_conc', 'Trial' = 'experiment')
# Reformat data for plotting
alpha_plot_data <- plotted_factors %>%
map2_dfr(names(plotted_factors),
function(factor, factor_name) {
out <- metadata
out$factor <- factor_name
out$value <- as.character(metadata[[factor]])
return(out)
}) %>%
mutate(path_conc = factor(path_conc,
levels = sort(unique(path_conc)),
labels = paste(sort(unique(path_conc)), 'CFU/g'),
ordered = TRUE)) %>%
filter(sample_type == 'Sample') %>%
select(sample_name, factor, value, invsimpson) %>%
tidyr::gather(key = "index", value = "diversity", -sample_name, -factor, -value) %>%
mutate(value = forcats::fct_relevel(ordered(value), "Control", "Cin", "Plu", "Cry"))
# ANOVA and Tukey's HSD
anova_and_hsd <- function(x) {
anova_result <- aov(diversity ~ value, x)
tukey_result <- agricolae::HSD.test(anova_result, "value", group = TRUE)
group_data <- tukey_result$groups[order(rownames(tukey_result$groups)),]
group_key <- setNames(group_data$groups, rownames(group_data))
group_key[as.character(x$value)]
}
alpha_plot_data$group <- unlist(map(split(alpha_plot_data, alpha_plot_data$factor)[unique(alpha_plot_data$factor)], anova_and_hsd))
alpha_subplot <- ggplot(alpha_plot_data, aes(x = value, y = diversity)) +
geom_boxplot() +
geom_text(aes(x = value,
y = max(diversity) + 2,
label = group),
col = 'black',
size = 5) +
facet_grid( ~ factor, scales = "free") +
labs(x = NULL, y = 'Diversity (Inverse Simpson)') +
guides(color = "none") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position="bottom")set.seed(1)
prob_table <- obj$data$prop[metadata$sample_name]
nmds_plot_data <- function(prob_table) {
metadata <- metadata[metadata$sample_name %in% colnames(prob_table), ]
set.seed(1)
nmds_results <- vegan::metaMDS(t(prob_table), trymax = 1000, k = 2, trace = 0)
nmds_data <- nmds_results$points %>%
as_tibble() %>%
bind_cols(metadata)
names(nmds_data)[1:2] <- paste0("NMDS", 1:2)
return(nmds_data)
}
nmds_data <- nmds_plot_data(prob_table[! is.na(metadata$organism) & metadata$valid_inoc])
nmds_factors <- c(Flooded = 'flooded', Organism = 'organism', 'Pathogen CFU/g' = 'path_conc', 'Trial' = 'experiment')
make_one_plot <- function(factor, name) {
nmds_data %>%
mutate(factor = as.character(nmds_data[[factor]]),
NMDS1 = scales::rescale(NMDS1),
NMDS2 = scales::rescale(NMDS2)) %>%
mutate(factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu", "Cry")) %>%
ggplot(aes_string(x = "NMDS1", y = "NMDS2", color = "factor", label = "sample_name")) +
# geom_label(size = 2) +
geom_point() +
coord_fixed() +
viridis::scale_color_viridis(discrete = TRUE, end = .9) +
labs(color = NULL, x = NULL, y = NULL) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_text(size = 7),
axis.ticks = element_blank(),
plot.margin = unit(rep(0.04, 4), "cm"),
# panel.background = element_rect(fill = 'transparent', colour = NA),
# plot.background = element_rect(fill = "white", colour = NA),
legend.position = "bottom",
legend.text = element_text(size = 10),
legend.key.height = unit(0.5, 'cm'),
legend.key.width = unit(0.2, 'cm'))
}
nmds_subplots <- map2(nmds_factors, names(nmds_factors), make_one_plot)
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#> "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
#> Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
#> ℹ Please use tidy evaluation idioms with `aes()`.
#> ℹ See also `vignette("ggplot2-in-packages")` for more information.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#> "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
#> There was 1 warning in `mutate()`.
#> ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
#> "Cry")`.
#> Caused by warning:
#> ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
nmds_plot <- ggpubr::ggarrange(plotlist = c(list(ggplot() + theme_void()), nmds_subplots),
nrow = 1, widths = c(0.15, 1, 1, 1, 1))
ggsave(nmds_plot, path = "standard_workflow/combined_figures", filename = "nmds.pdf",
width = 7, height = 8)
print(nmds_plot)
combined_div_plot <- ggpubr::ggarrange(alpha_subplot, nmds_plot, ncol = 1, labels = c('A', 'B'),
heights = c(1, 1))
combined_div_plot
ggsave(combined_div_plot, path = "standard_workflow/combined_figures", filename = "diversity.pdf",
width = 10, height = 6, bg = "#FFFFFF")
ggsave(combined_div_plot, path = "standard_workflow/combined_figures", filename = "diversity.svg",
width = 10, height = 6, bg = "#FFFFFF")dist_matrix <- vegdist(t(prob_table), method = "bray")
adonis2_full <- adonis2(dist_matrix ~ organism + flooded + path_conc + experiment,
data = metadata,
permutations = 999,
method = "bray")
print(adonis2_full)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> adonis2(formula = dist_matrix ~ organism + flooded + path_conc + experiment, data = metadata, permutations = 999, method = "bray")
#> Df SumOfSqs R2 F Pr(>F)
#> organism 3 1.252 0.02381 1.3885 0.061 .
#> flooded 1 1.848 0.03514 6.1478 0.001 ***
#> path_conc 1 0.276 0.00524 0.9170 0.500
#> experiment 1 2.616 0.04974 8.7009 0.001 ***
#> Residual 155 46.599 0.88606
#> Total 161 52.592 1.00000
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2_org <- adonis2(dist_matrix ~ organism,
data = metadata,
permutations = 999,
method = "bray")
print(adonis2_org)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> adonis2(formula = dist_matrix ~ organism, data = metadata, permutations = 999, method = "bray")
#> Df SumOfSqs R2 F Pr(>F)
#> organism 3 1.252 0.02381 1.2847 0.114
#> Residual 158 51.339 0.97619
#> Total 161 52.592 1.00000
adonis2_flooded <- adonis2(dist_matrix ~ flooded,
data = metadata,
permutations = 999,
method = "bray")
print(adonis2_flooded)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> adonis2(formula = dist_matrix ~ flooded, data = metadata, permutations = 999, method = "bray")
#> Df SumOfSqs R2 F Pr(>F)
#> flooded 1 1.849 0.03516 5.8301 0.001 ***
#> Residual 160 50.743 0.96484
#> Total 161 52.592 1.00000
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2_path_conc <- adonis2(dist_matrix ~ path_conc,
data = metadata,
permutations = 999,
method = "bray")
print(adonis2_path_conc)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> adonis2(formula = dist_matrix ~ path_conc, data = metadata, permutations = 999, method = "bray")
#> Df SumOfSqs R2 F Pr(>F)
#> path_conc 1 0.308 0.00586 0.9434 0.487
#> Residual 160 52.283 0.99414
#> Total 161 52.592 1.00000
adonis2_experiment <- adonis2(dist_matrix ~ experiment,
data = metadata,
permutations = 999,
method = "bray")
print(adonis2_experiment)
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> adonis2(formula = dist_matrix ~ experiment, data = metadata, permutations = 999, method = "bray")
#> Df SumOfSqs R2 F Pr(>F)
#> experiment 1 2.615 0.04972 8.3709 0.001 ***
#> Residual 160 49.977 0.95028
#> Total 161 52.592 1.00000
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#Make sure our factors are input properly as factors
#Make sure our factors are input properly as factors
metadata$path_conc <- as.factor(metadata$path_conc)
metadata$path_conc<- factor(metadata$path_conc, levels = c("0", "1", "100"))
metadata$organism <- as.factor(metadata$organism)
metadata$organism<- factor(metadata$organism, levels = c("Control", "Cin", "Cry", "Plu"))
metadata$flooded <- as.factor(metadata$flooded)
metadata$flooded <- factor(metadata$flooded, levels = c("TRUE", "FALSE"))
metadata$experiment <- as.factor(metadata$experiment)
metadata$experiment<- factor(metadata$experiment, levels = c("1", "2"))
abundance$is_low_abund <- rowSums(abundance[, metadata$sample_name]) < 5
obj2 <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
class_regex = '^(.+)--(.+)--(.+)$',
class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj2$data) <- c('abund', 'score')
obj2 <- transmute_obs(obj2, 'score', sequence = sequence[input_index], boot = boot, rank = rank)
print(obj2)
#> <Taxmap>
#> 1572 taxa: aab. Eukaryota, aac. Fungi ... cim. NA
#> 1572 edges: NA->aab, aab->aac, aab->aad ... cik->cil, cil->cim
#> 2 data sets:
#> abund:
#> # A tibble: 2,697 × 179
#> taxon_id sequence dada2_tax is_inoculum inoculum S1 S10
#> <chr> <chr> <chr> <lgl> <chr> <int> <int>
#> 1 bfz AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> 2 bga AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> 3 bgb AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> # ℹ 2,694 more rows
#> # ℹ 172 more variables: S100 <int>, S101 <int>, S102 <int>,
#> # S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
#> # S108 <int>, S109 <int>, …
#> score:
#> # A tibble: 23,356 × 4
#> taxon_id sequence boot rank
#> <chr> <chr> <chr> <chr>
#> 1 aab AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 Doma…
#> 2 aac AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 King…
#> 3 aae AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 Phyl…
#> # ℹ 23,353 more rows
#> 0 functions:
obj2$data$tax_abund <- calc_taxon_abund(obj2, data = "abund", cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1572 taxa
obj2$data$abund_prop <- calc_obs_props(obj2, data = "abund", cols = metadata$sample_name, groups = rep("tax_prop", nrow(metadata)))
#> Calculating proportions from counts for 162 columns in 1 groups for 2697 observations.
obj2$data$tax_prop <- calc_taxon_abund(obj2, data = "abund_prop", cols = "tax_prop")
#> Summing per-taxon counts from 1 columns for 1572 taxa
obj2$data$abund_prop <- NULL
obj_subset <- obj2 %>%
filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE))
min_bootstrap <- 0
obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
group_by(taxon_id) %>%
summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))#> [1] 1.9483e-11 1.0000e+00
#> [1] -9.825868 29.147210
#> [1] 3.732973e-21 9.956213e-01
#> [1] -24.79712 5.11938
#> [1] 2.67702e-46 1.00000e+00
#> [1] -42.21227 41.73490
#> [1] 1.147702e-192 9.859411e-01
#> [1] -29.43794 25.78321
#> quartz_off_screen
#> 2
obj_subset <- obj2 %>%
filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE))
min_bootstrap <- 60
obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
group_by(taxon_id) %>%
summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))
obj_subset$data$asv_prop <- calc_obs_props(obj_subset, 'abund', cols = metadata$sample_name)
#> Calculating proportions from counts for 162 columns for 2697 observations.
obj_subset$data$tax_abund <- calc_taxon_abund(obj_subset, 'abund', cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1135 taxa
obj_subset$data$tax_prop <- calc_taxon_abund(obj_subset, 'asv_prop', cols = metadata$sample_name)
#> Summing per-taxon counts from 162 columns for 1135 taxa
obj_subset$data$tax_data <- calc_n_samples(obj_subset, 'tax_prop', cols = metadata$sample_name[metadata$sample_type == 'Sample'])
#> Calculating number of samples with a value greater than 0 for 162 columns for 1135 observations
obj_subset$data$tax_data$mean_prop <- rowMeans(obj_subset$data$tax_prop[, metadata$sample_name])
# Calculate the relative standard deviation for each taxon as a measure of how consistently it was found.
rsd <- function(x, na.rm = FALSE) {sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)}
obj_subset$data$tax_data$rel_stand_dev <- map_dbl(1:nrow(obj_subset$data$tax_abund), function(i) {
rsd(unlist(obj_subset$data$tax_abund[i, metadata$sample_name]), na.rm = TRUE)
})
obj_subset
#> <Taxmap>
#> 1135 taxa: aab. Eukaryota ... cim. NA
#> 1135 edges: NA->aab, aab->aac, aab->aad ... cik->cil, cil->cim
#> 6 data sets:
#> abund:
#> # A tibble: 2,697 × 179
#> taxon_id sequence dada2_tax is_inoculum inoculum S1 S10
#> <chr> <chr> <chr> <lgl> <chr> <int> <int>
#> 1 bfz AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> 2 bga AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> 3 bgb AAAAAGTCG… Eukaryot… FALSE <NA> 0 0
#> # ℹ 2,694 more rows
#> # ℹ 172 more variables: S100 <int>, S101 <int>, S102 <int>,
#> # S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
#> # S108 <int>, S109 <int>, …
#> score:
#> # A tibble: 22,066 × 4
#> taxon_id sequence boot rank
#> <chr> <chr> <dbl> <chr>
#> 1 aab AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 Doma…
#> 2 aac AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 King…
#> 3 aae AAAAAGTCGTAACAAGGTAACCGTAGGTGAACCTGCGGTT… 100 Phyl…
#> # ℹ 22,063 more rows
#> tax_abund:
#> # A tibble: 1,135 × 163
#> taxon_id S1 S10 S100 S101 S102 S103 S104 S105 S106
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 aab 56792 35094 50052 42596 85519 70268 52090 54075 51693
#> 2 aac 27698 12210 11111 13034 18437 19077 10116 11106 12154
#> 3 aad 29094 22884 38941 29562 67082 51191 41974 42969 39539
#> # ℹ 1,132 more rows
#> # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> # S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#> # S114 <dbl>, S115 <dbl>, …
#> tax_prop:
#> # A tibble: 1,135 × 163
#> taxon_id S1 S10 S100 S101 S102 S103 S104 S105 S106
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 aab 1 1 1 1 1 1 1 1 1
#> 2 aac 0.488 0.348 0.222 0.306 0.216 0.271 0.194 0.205 0.235
#> 3 aad 0.512 0.652 0.778 0.694 0.784 0.729 0.806 0.795 0.765
#> # ℹ 1,132 more rows
#> # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> # S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#> # S114 <dbl>, S115 <dbl>, …
#> asv_prop:
#> # A tibble: 2,697 × 163
#> taxon_id S1 S10 S100 S101 S102 S103 S104 S105 S106
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 bfz 0 0 0 0 0 0 0 0 0
#> 2 bga 0 0 0 0 0 0 0 0 0
#> 3 bgb 0 0 0 0 0 0 0 0 0
#> # ℹ 2,694 more rows
#> # ℹ 153 more variables: S107 <dbl>, S108 <dbl>, S109 <dbl>,
#> # S11 <dbl>, S110 <dbl>, S111 <dbl>, S112 <dbl>, S113 <dbl>,
#> # S114 <dbl>, S115 <dbl>, …
#> tax_data:
#> # A tibble: 1,135 × 4
#> taxon_id n_samples mean_prop rel_stand_dev
#> <chr> <int> <dbl> <dbl>
#> 1 aab 162 1 0.444
#> 2 aac 162 0.416 0.652
#> 3 aad 162 0.584 0.711
#> # ℹ 1,132 more rows
#> 0 functions:
set.seed(1000)
obj_subset %>%
filter_taxa(! is_stem) %>%
filter_taxa(n_samples >= 10, supertaxa = TRUE, reassign_obs = FALSE) %>%
filter_taxa(! grepl(x = taxon_names, "_sp$"), reassign_obs = FALSE) %>%
filter_taxa(! grepl(x = taxon_names, "incertae_sedis", ignore.case = TRUE), reassign_obs = FALSE) %>%
filter_taxa(! grepl(x = taxon_names, "NA", ignore.case = TRUE), reassign_obs = FALSE) %>%
remove_redundant_names() %>%
heat_tree(node_size = mean_prop,
edge_size = n_samples,
node_color = ifelse(is.na(rel_stand_dev), 0, rel_stand_dev),
#node_color_range = c("#aaaaaa", "#8da0cb", "#66c2a5", "#a6d854", "#fc8d62", "red"),
node_label = taxon_names,
node_size_range = c(0.008, 0.025),
node_label_size_range = c(0.012, 0.018),
edge_label_size_range = c(0.010, 0.013),
node_size_interval = c(0, 1),
edge_size_range = c(0.001, 0.008),
#layout = "da", initial_layout = "re",
node_color_axis_label = "Relative standard deviation",
node_size_axis_label = "Mean proportion of reads",
edge_size_axis_label = "Number of samples",
node_color_digits = 2,
node_size_digits = 2,
edge_color_digits = 2,
edge_size_digits = 2,
#aspect_ratio = 1.618,
output_file = file.path('standard_workflow/combined_figures', '/heattree_mostabund_taxa.pdf'))sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.3.2 (2023-10-31)
#> os macOS Ventura 13.2.1
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Los_Angeles
#> date 2024-11-22
#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> abind 1.4-5 2016-07-21 [1] CRAN (R 4.3.0)
#> ade4 1.7-22 2023-02-06 [1] CRAN (R 4.3.0)
#> agricolae 1.3-7 2023-10-22 [1] CRAN (R 4.3.1)
#> AlgDesign 1.2.1 2022-05-25 [1] CRAN (R 4.3.0)
#> ape 5.8 2024-04-11 [1] CRAN (R 4.3.1)
#> askpass 1.2.0 2023-09-03 [1] CRAN (R 4.3.0)
#> backports 1.5.0 2024-05-23 [1] CRAN (R 4.3.3)
#> Biobase 2.62.0 2023-10-26 [1] Bioconductor
#> BiocGenerics * 0.48.1 2023-11-02 [1] Bioconductor
#> BiocParallel 1.36.0 2023-10-26 [1] Bioconductor
#> biomformat 1.30.0 2023-10-26 [1] Bioconductor
#> Biostrings * 2.70.3 2024-04-03 [1] bioc_xgit (@c213e35)
#> bit 4.5.0 2024-09-20 [1] CRAN (R 4.3.3)
#> bit64 4.5.2 2024-09-22 [1] CRAN (R 4.3.3)
#> bitops 1.0-8 2024-07-29 [1] CRAN (R 4.3.3)
#> broom 1.0.6 2024-05-17 [1] CRAN (R 4.3.3)
#> bslib 0.8.0 2024-07-29 [1] CRAN (R 4.3.3)
#> cachem 1.1.0 2024-05-16 [1] CRAN (R 4.3.2)
#> car 3.1-2 2023-03-30 [1] CRAN (R 4.3.0)
#> carData 3.0-5 2022-01-06 [1] CRAN (R 4.3.0)
#> cli 3.6.3 2024-06-21 [1] CRAN (R 4.3.2)
#> cluster 2.1.6 2023-12-01 [1] CRAN (R 4.3.1)
#> codetools 0.2-20 2024-03-31 [1] CRAN (R 4.3.1)
#> colorspace 2.1-1 2024-07-26 [1] CRAN (R 4.3.3)
#> cowplot 1.1.3 2024-01-22 [1] CRAN (R 4.3.1)
#> crayon 1.5.3 2024-06-20 [1] CRAN (R 4.3.3)
#> data.table * 1.15.4 2024-03-30 [1] CRAN (R 4.3.1)
#> decontam * 1.22.0 2023-10-26 [1] Bioconductor
#> DelayedArray 0.28.0 2023-11-06 [1] Bioconductor
#> DESeq2 1.42.1 2024-03-09 [1] Bioconductor 3.18 (R 4.3.3)
#> digest 0.6.37 2024-08-19 [1] CRAN (R 4.3.3)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.1)
#> evaluate 1.0.1 2024-10-10 [1] CRAN (R 4.3.3)
#> fansi 1.0.6 2023-12-08 [1] CRAN (R 4.3.1)
#> farver 2.1.2 2024-05-13 [1] CRAN (R 4.3.3)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.3.2)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.0)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.3.0)
#> furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.3.0)
#> future * 1.34.0 2024-07-29 [1] CRAN (R 4.3.3)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
#> GenomeInfoDb * 1.38.8 2024-03-16 [1] Bioconductor 3.18 (R 4.3.3)
#> GenomeInfoDbData 1.2.11 2023-11-14 [1] Bioconductor
#> GenomicRanges 1.54.1 2023-10-30 [1] Bioconductor
#> ggfittext 0.10.2 2024-02-01 [1] CRAN (R 4.3.1)
#> ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.3.1)
#> ggpubr 0.6.0 2023-02-10 [1] CRAN (R 4.3.0)
#> ggsignif 0.6.4 2022-10-13 [1] CRAN (R 4.3.0)
#> globals 0.16.3 2024-03-08 [1] CRAN (R 4.3.1)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.3.3)
#> gridExtra 2.3 2017-09-09 [1] CRAN (R 4.3.0)
#> gtable 0.3.5 2024-04-22 [1] CRAN (R 4.3.1)
#> highr 0.11 2024-05-26 [1] CRAN (R 4.3.3)
#> hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.3.1)
#> igraph 2.0.3 2024-03-13 [1] CRAN (R 4.3.1)
#> IRanges * 2.36.0 2023-10-26 [1] Bioconductor
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.3.0)
#> jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.0)
#> jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.3.1)
#> knitr 1.48 2024-07-07 [1] CRAN (R 4.3.3)
#> labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.0)
#> lattice * 0.22-6 2024-03-20 [1] CRAN (R 4.3.1)
#> lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.3.0)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.1)
#> listenv 0.9.1 2024-01-29 [1] CRAN (R 4.3.1)
#> locfit 1.5-9.10 2024-06-24 [1] CRAN (R 4.3.3)
#> magick * 2.8.4 2024-07-14 [1] CRAN (R 4.3.3)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
#> MASS 7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.1)
#> Matrix 1.6-5 2024-01-11 [1] CRAN (R 4.3.1)
#> MatrixGenerics 1.14.0 2023-10-26 [1] Bioconductor
#> matrixStats 1.3.0 2024-04-11 [1] CRAN (R 4.3.1)
#> metacoder * 0.3.7 2024-02-20 [1] CRAN (R 4.3.1)
#> mgcv 1.9-1 2023-12-21 [1] CRAN (R 4.3.1)
#> multtest 2.58.0 2023-10-26 [1] Bioconductor
#> munsell 0.5.1 2024-04-01 [1] CRAN (R 4.3.1)
#> nlme 3.1-165 2024-06-06 [1] CRAN (R 4.3.3)
#> parallelly 1.38.0 2024-07-27 [1] CRAN (R 4.3.3)
#> pdftools * 3.4.1 2024-09-20 [1] CRAN (R 4.3.3)
#> permute * 0.9-7 2022-01-27 [1] CRAN (R 4.3.0)
#> phyloseq * 1.46.0 2024-04-03 [1] bioc_xgit (@7320133)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
#> plyr 1.8.9 2023-10-02 [1] CRAN (R 4.3.1)
#> purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.3.0)
#> qpdf 1.3.4 2024-10-04 [1] CRAN (R 4.3.3)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.0)
#> ragg 1.3.2 2024-05-15 [1] CRAN (R 4.3.3)
#> Rcpp 1.0.13 2024-07-17 [1] CRAN (R 4.3.2)
#> RCurl 1.98-1.16 2024-07-11 [1] CRAN (R 4.3.3)
#> readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.1)
#> reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.3.0)
#> rhdf5 2.46.1 2023-12-02 [1] Bioconductor 3.18 (R 4.3.2)
#> rhdf5filters 1.14.1 2023-11-06 [1] Bioconductor
#> Rhdf5lib 1.24.2 2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
#> rlang 1.1.4 2024-06-04 [1] CRAN (R 4.3.2)
#> rmarkdown 2.27 2024-05-17 [1] CRAN (R 4.3.3)
#> rstatix 0.7.2 2023-02-01 [1] CRAN (R 4.3.0)
#> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.3.1)
#> S4Arrays 1.2.1 2024-03-05 [1] Bioconductor 3.18 (R 4.3.2)
#> S4Vectors * 0.40.2 2023-11-25 [1] Bioconductor 3.18 (R 4.3.2)
#> sass 0.4.9 2024-03-15 [1] CRAN (R 4.3.1)
#> scales 1.3.0 2023-11-28 [1] CRAN (R 4.3.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
#> SparseArray 1.2.4 2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
#> stringi 1.8.4 2024-05-06 [1] CRAN (R 4.3.2)
#> stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.3.1)
#> SummarizedExperiment 1.32.0 2023-11-06 [1] Bioconductor
#> survival 3.7-0 2024-06-05 [1] CRAN (R 4.3.3)
#> svglite 2.1.3 2023-12-08 [1] CRAN (R 4.3.1)
#> systemfonts 1.1.0 2024-05-15 [1] CRAN (R 4.3.3)
#> textshaping 0.4.0 2024-05-24 [1] CRAN (R 4.3.3)
#> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
#> tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.3.1)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.1)
#> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
#> utf8 1.2.4 2023-10-22 [1] CRAN (R 4.3.1)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.1)
#> vegan * 2.6-6.1 2024-05-21 [1] CRAN (R 4.3.3)
#> viridis 0.6.5 2024-01-29 [1] CRAN (R 4.3.1)
#> viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.3.0)
#> vroom 1.6.5 2023-12-05 [1] CRAN (R 4.3.1)
#> withr 3.0.1 2024-07-31 [1] CRAN (R 4.3.2)
#> xfun 0.48 2024-10-03 [1] CRAN (R 4.3.3)
#> XVector * 0.42.0 2023-10-26 [1] Bioconductor
#> yaml 2.3.10 2024-07-26 [1] CRAN (R 4.3.3)
#> zlibbioc 1.48.2 2024-03-19 [1] Bioconductor 3.18 (R 4.3.3)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────